import requests

from pyquery import PyQuery as pq

import re
import random
import MySQLdb
import time
import sys
import json


'''

db = MySQLdb.connect("localhost", "root", "123456", "yii2", charset='utf8' )

cursor = db.cursor()


insert_row={
    'tel':'18600880450',
    'contact':'沙先生',

    'created_at':time.time(),
    'updated_at':time.time()

}
sql="replace into ganji_job (`tel`,`contact`,`created_at`,`updated_at`)values ('%(tel)s','%(contact)s',%(created_at)d,%(updated_at)d)"%insert_row


try:
   cursor.execute(sql)
   db.commit()
except:
   db.rollback()
   print("errr")


db.close()
'''




# url='http://www.ganji.com/pub/pub.php?act=pub&method=load&cid=11&jobinfo=102135%2C106713&reply=12%3B3672377546%3B2&fbranch=i&domain=bj&is_iframe=1&from=viewFullPhone&source_position=wanted_detail_tel_pub'


'''

url='http://bj.ganji.com/zpjiudiancanyin/'

cookies={'ganji_uuid':'8521712200318470401733',' ganji_xuuid':'5ac67d89-bbc1-4fb9-c53e-0d2a5e94f32d.1541061507145',' xxzl_deviceid':'Ekt1HkxlXnahQSXStFfRk1HxVKHoquZlAL8xs8UG9qbzmlT9gmPYPYfRyGQP%2Bk2L',' lg':'1',' citydomain':'bj',' __utmz':'32156897.1541061516.1.1.utmcsr=bj.ganji.com|utmccn=(referral)|utmcmd=referral|utmcct=/',' WantedListPageScreenType':'1920',' gj_footprint':'%5B%5B%22%5Cu552e%5Cu524d%5Cu5de5%5Cu7a0b%5Cu5e08%22%2C%22%5C%2Fzpsqgongchengshi%5C%2F%22%5D%2C%5B%22%5Cu6280%5Cu5de5%5C%2F%5Cu5de5%5Cu4eba%22%2C%22%5C%2Fzpjigongyibangongren%5C%2F%22%5D%2C%5B%22%5Cu9500%5Cu552e%22%2C%22%5C%2Fzpshichangyingxiao%5C%2F%22%5D%5D',' sscode':'g9IFZ%2BZSH3u9JEjgg93avdJO',' GanjiUserName':'%23t_802660659',' GanjiUserInfo':'%7B%22user_id%22%3A802660659%2C%22email%22%3A%22%22%2C%22username%22%3A%22%23t_802660659%22%2C%22user_name%22%3A%22%23t_802660659%22%2C%22nickname%22%3A%22%22%7D',' bizs':'%5B%5D',' xxzl_smartid':'889ef013f0f1d2c5fe36c611ffdfd1f8',' last_name':'%23t_802660659',' GanjiLoginType':'1',' _wap__utmganji_wap_newCaInfo_V2':'%7B%22ca_n%22%3A%22-%22%2C%22ca_s%22%3A%22self%22%2C%22ca_i%22%3A%22-%22%7D',' __utma':'32156897.696892250.1541061516.1541133153.1541135035.7',' __utmt':'1',' _gl_tracker':'%7B%22ca_source%22%3A%22-%22%2C%22ca_name%22%3A%22-%22%2C%22ca_kw%22%3A%22-%22%2C%22ca_id%22%3A%22-%22%2C%22ca_s%22%3A%22self%22%2C%22ca_n%22%3A%22-%22%2C%22ca_i%22%3A%22-%22%2C%22sid%22%3A30422724802%7D',' GANJISESSID':'4hh2km45ldiqukp9hqsua8uqep',' ganji_login_act':'1541135310722',' __utmb':'32156897.4.10.1541135035',' supercookie':'BQNlAwLjAwH5WQIyLzH2ZQSwZTExAzAwLmLkAGOyBGqxAQEuLJIxL2L2ZQIyAJDkL2D%3D',' __utmc1':'32156897'}

response=requests.get(url,cookies=cookies,verify=False)
html=response.text
doc =pq(html)
aitems=doc('.new-dl-wrapper dl.job-list dt').children('a')
for a in aitems:
    itema=a.items()
    print(itema[0][1])
'''






# dl_a=doc('.con-list-zcon  dt.con-list-zp').children('a').items()
'''

dl_a=doc('.con-list-zcon  dt.con-list-zp a.list-ga').items()
i=0
for each in dl_a:
    i+=1
    print(each)
print(i)

a='abc'
if 'b' not in a or 1:
    print(" not in")
else:
    print(" in")
exit(0)
'''




def get_next_url(lis):
    for a in lis:
        if a.has_class("next"):
            return a.attr('href')
    return ''


urls={
     'bj':1,
     'sh':2,
     'sz':5
}
'''


url='http://bj.ganji.com/zpjiudiancanyin/3687464267x.htm'

response=requests.get(url)
html=response.text

doc =pq(html)
#职位名称
job_name=doc(".module-basic .title-line ").children("p").text()
#职位地址,坐标
job_address_loc=doc('#baidu-map').attr("data-ref")
job_address_loc_dict=json.loads(job_address_loc)
job_address=job_address_loc_dict['address']
job_lonlat=job_address_loc_dict['lnglat']
job_lonlat=re.sub('[a-z]','',job_lonlat,0,re.I)

#公司信息
company=doc('.company-info h3 a').text()

#薪水
salary=doc('.salary-line')
salary="%s%s"%(salary.children('b').text(),salary.children('i').text())

'''


header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36', 'Referer': 'http://bj.pupuwang.com/syzr/'}


url='http://bj.pupuwang.com/syzr/search'


url='http://bj.pupuwang.com/syzr/details/4941231/'
header={'Referer': 'http://bj.pupuwang.com/syzr/search/'}

response=requests.get(url,headers=header)
html=response.text
print(html)




























